(Gilbert et al. 2014)[https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4253859/] treated K562 cells and compared ricin treated to untreated cells to find genes that contribute to resistance and susceptibility of ricin. They also sequenced the day 0 population. In theory, if we compare the untreated population to the initial population then essential genes should be depleted among the guides. We will use this data as a baseline to compare methods, as essential genes are a well studied class of genes.

WeissmanCRISPRiRicinTreatment = read.table(file = "~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatment.txt", header = TRUE)
head(WeissmanCRISPRiRicinTreatment)
##                                                       sgId
## 1 Apoptosis+Cancer+Other_Cancer=A2M_+_9268488.25-all~e39m1
## 2 Apoptosis+Cancer+Other_Cancer=A2M_+_9268495.24-all~e39m1
## 3 Apoptosis+Cancer+Other_Cancer=A2M_+_9268513.26-all~e39m1
## 4 Apoptosis+Cancer+Other_Cancer=A2M_+_9268524.25-all~e39m1
## 5 Apoptosis+Cancer+Other_Cancer=A2M_+_9268659.23-all~e39m1
## 6 Apoptosis+Cancer+Other_Cancer=A2M_+_9268708.26-all~e39m1
##                  sequence gene T0JEV T0MH treatedJEV treatedMH
## 1  GACCAGATGGATTGTAGGGAGT  A2M   988 1114        813      1116
## 2   GTTTGGGACCAGATGGATTGT  A2M   587  592        356       526
## 3 GCCCAGTTGCTTTGGGAAGTGTT  A2M   843  909        818       721
## 4  GCAGCATAAAGCCCAGTTGCTT  A2M   572  655        848       797
## 5    GGGATTCTATTTAGCCCGCC  A2M   431  417        349       396
## 6 GTACGGTAAAAGTGAGCTCTTAC  A2M   191  159         59       248
##   untreatedJEV untreatedMH
## 1         1131        1050
## 2          670         507
## 3         1057         867
## 4          855         690
## 5          394         411
## 6          217         167
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts = WeissmanCRISPRiRicinTreatment[ ,c("sequence", "gene", "T0JEV", "T0MH", "untreatedJEV", "untreatedMH")]
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts[-which(rowSums(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts[ ,3:6]) < 50), ]
write.table(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts[-which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts$gene == "negative_control"), ], file = "~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0GeneCounts.txt", quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE)
write.table(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts$gene == "negative_control"), ], file = "~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NegCtrlCounts.txt", quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE)
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colMeans,
##     colnames, colSums, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply,
##     lengths, Map, mapply, match, mget, order, paste, pmax,
##     pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
##     rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Warning: package 'GenomicRanges' was built under R version 3.4.3
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Warning: package 'SummarizedExperiment' was built under R version 3.4.3
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:base':
## 
##     apply
counts = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts[, 3:6]
coldata = data.frame(condition = c(0, 0, 1, 1), rep = c(0, 1, 0, 1))
rownames(coldata) = colnames(counts)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0DESeq = DESeq2::DESeqDataSetFromMatrix(countData = counts, 
                                                                                   colData = coldata, 
                                                                                   design = ~ condition)
## the design formula contains a numeric variable with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0DESeq = DESeq2::DESeq(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0DESeq)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
##    function: y = a/x + b, and a local regression fit was automatically substituted.
##    specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0DESeq = DESeq2::results(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0DESeq)
log2fc = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0DESeq$log2FoldChange
b = seq(from = min(log2fc) - 0.1, to = max(log2fc) + 0.1, length = 81)
negCtrl = log2fc[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts$gene == "negative_control")]
log2fc = log2fc[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts$gene != "negative_control")]
geneIds = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Counts$gene != "negative_control")]
geneIds = factor(geneIds, levels = unique(geneIds))
hist(negCtrl, breaks = b, probability = TRUE, col = rgb(1, 0, 0, 0.7), xlab = "log2fc", main = "negative control vs gene targetting guides")
hist(log2fc, breaks = b, probability = TRUE, add = TRUE, col = rgb(0, 1, 0, 0.7))
legend("topleft", legend = c("negative control", "gene targetting"), lwd = 0, lty = 0, pch = 16, col = c(rgb(1, 0, 0, 0.7), rgb(0, 1, 0, 0.7)))

min(negCtrl)
## [1] -5.255485
min(log2fc)
## [1] -10.99162

There is a clear enrichment of depleted guides compared to the negative control guides. Let’s apply and take a look at the results of different algorithms.

design_matrix = data.frame(Samples = rownames(coldata), baseline = c(1, 1, 1, 1), condition = coldata$condition)
write.table(design_matrix, file = "~/sgRNA/sgRNA2Groups/data/Weissman/design_matrix.txt", quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE)
system("~/sgRNA/sgRNA2Groups/data/mageck/mageck-0.5.6/bin/mageck mle -k ~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0GeneCounts.txt -d ~/sgRNA/sgRNA2Groups/data/Weissman/design_matrix.txt --output-prefix ~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle --control-sgrna ~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NegCtrlCounts.txt")
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle = read.table(file = "~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.gene_summary.txt", header = TRUE)
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle)
##     Gene sgRNA condition.beta condition.z condition.p.value condition.fdr
## 1  RNF14    20       0.106220     4.31670         0.0012645      0.033611
## 2 DUOXA1    10       0.019616     0.58528         0.3902300      0.875420
## 3  RNF17    10       0.104460     2.98260         0.0014523      0.038095
## 4  RNF10    20       0.016656     0.68569         0.4333900      0.886710
## 5  RNF11    10       0.016568     0.48065         0.4345700      0.887640
## 6  RNF13    10       0.040640     1.17980         0.1515500      0.691240
##   condition.wald.p.value condition.wald.fdr
## 1             1.5836e-05         0.00017024
## 2             5.5836e-01         0.85116000
## 3             2.8577e-03         0.02196900
## 4             4.9291e-01         0.81930000
## 5             6.3077e-01         0.88152000
## 6             2.3810e-01         0.60645000
#hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.wald.p.value, breaks = 80, col = "grey")
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.p.value, breaks = 80, col = "grey")

system("~/sgRNA/sgRNA2Groups/data/mageck/mageck-0.5.6/bin/mageck test -k ~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0GeneCounts.txt  -t T0JEV,T0MH --output-prefix ~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckRra --control-sgrna ~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NegCtrlCounts.txt")
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckRra = read.table(file = "~/sgRNA/sgRNA2Groups/data/Weissman/WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckRra.gene_summary.txt", header = TRUE)
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckRra)
##         id num  neg.score neg.p.value neg.fdr neg.rank neg.goodsgrna
## 1     SPI1  10 2.4991e-17  3.1001e-07       0        1             9
## 2 C12orf66  10 2.1441e-14  3.1001e-07       0        2             8
## 3     MTF2  10 2.8904e-13  3.1001e-07       0        3            10
## 4    STAG2  25 1.5763e-12  3.1001e-07       0        4            18
## 5     BPGM  10 2.1601e-10  3.1001e-07       0        5             8
## 6   STAT5A  20 2.0139e-09  3.1001e-07       0        6            15
##    neg.lfc pos.score pos.p.value  pos.fdr pos.rank pos.goodsgrna  pos.lfc
## 1 -0.49186   0.96896   0.0014418 0.001442    15114             0 -0.49186
## 2 -0.55801   0.99838   0.0014418 0.001442    15901             0 -0.55801
## 3 -0.38311   1.00000   0.0014418 0.001442    15968             0 -0.38311
## 4 -0.22078   0.92604   0.0014418 0.001442    14231             2 -0.22078
## 5 -0.24166   0.36034   0.0014412 0.001442     6939             1 -0.24166
## 6 -0.19751   1.00000   0.0014418 0.001442    15969             0 -0.19751
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckRra$neg.p.value, breaks = 80, col = "grey")

hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckRra$pos.p.value, breaks = 80, col = "grey")

library(CRISPhieRmix)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix = CRISPhieRmix(x = log2fc, geneIds = geneIds, negCtrl = negCtrl, mu = -6, sigma = 2, pq = 0.05, VERBOSE = TRUE, PLOT = TRUE, nMesh = 20)

## fit negative control distributions 
## 2 groups 
## EM converged 
## mu =  -1.678681 
## sigma =  1.361389 
## pq =  0.0538104

WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores = data.frame(gene = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix$genes, locfdr = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix$locfdr)
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr, breaks = 80, col = "grey")

head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores[order(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr, decreasing = FALSE), ], 10)
##              gene locfdr
## ANAPC4     ANAPC4      0
## ANAPC5     ANAPC5      0
## AQR           AQR      0
## ASCC3       ASCC3      0
## ATF5         ATF5      0
## BCAS2       BCAS2      0
## CAD           CAD      0
## CASP8AP2 CASP8AP2      0
## CDC20       CDC20      0
## CDC27       CDC27      0
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Normalmix = CRISPhieRmix(x = log2fc, geneIds = geneIds, mu = -6, sigma = 2, pq = 0.05, VERBOSE = TRUE, PLOT = TRUE, nMesh = 20)
## no negative controls provided, fitting hierarchical normal model
## Loading required package: mixtools
## mixtools package, version 1.1.0, Released 2017-03-10
## This package is based upon work supported by the National Science Foundation under Grant No. SES-0518772.
## number of iterations= 20

WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores = data.frame(gene = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Normalmix$genes, locfdr = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Normalmix$locfdr)
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr, breaks = 80, col = "grey")

head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores[order(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr, decreasing = FALSE), ], 10)
##              gene locfdr
## AATF         AATF      0
## ABCA3       ABCA3      0
## ABCB10     ABCB10      0
## ABTB1       ABTB1      0
## ACTG1       ACTG1      0
## ACTR8       ACTR8      0
## ADAMTSL3 ADAMTSL3      0
## ADCY3       ADCY3      0
## ADCY4       ADCY4      0
## ADSS         ADSS      0
MannWhitneyPvals = sapply(unique(geneIds), function(g) return(wilcox.test(x = log2fc[which(geneIds == g)], negCtrl, paired = FALSE)$p.value))
names(MannWhitneyPvals) = unique(geneIds)
hist(MannWhitneyPvals, breaks = 40, col = "grey")

MannWhitneyFdrs = p.adjust(MannWhitneyPvals, method = "BH")
head(sort(MannWhitneyFdrs, decreasing = FALSE), 20)
##        RPL17        ASNA1        RPL31         LSM6        COPB1 
## 9.615331e-12 1.675838e-11 2.735450e-11 4.450390e-11 5.259081e-11 
##        SNRPF     HSD17B10        HSPA9       RPL10A          AQR 
## 1.488890e-10 2.782676e-10 3.048158e-10 5.597503e-10 7.115365e-10 
##         NOP2      SNRNP70       TANGO6        LAS1L       CLNS1A 
## 7.115365e-10 7.228967e-10 7.228967e-10 7.869923e-10 9.601214e-10 
##       INCENP        CYB5B        NRDE2        KPNB1        PSMA1 
## 9.872310e-10 1.417373e-09 1.429930e-09 2.199799e-09 2.510920e-09
top3MannWhitneyPval <- function(x, negCtrl){
  top3x = head(sort(x, decreasing = FALSE), 3)
  topNegCtrl = head(sort(negCtrl, decreasing = FALSE), round(3*length(negCtrl)/length(x)))
  return(wilcox.test(top3x, topNegCtrl, paired = FALSE)$p.value)
}
top3MannWhitneyPvals = sapply(unique(geneIds), function(g) return(top3MannWhitneyPval(x = log2fc[which(geneIds == g)], negCtrl = negCtrl)))
names(top3MannWhitneyPvals) = unique(geneIds)
top3MannWhitneyFdrs = p.adjust(top3MannWhitneyPvals, method = "BH")
head(sort(top3MannWhitneyFdrs, decreasing = FALSE), 20)
##       AATF      ACTR8    ADCYAP1       AMBP    ANAPC13     ANAPC1 
## 0.04193475 0.04193475 0.04193475 0.04193475 0.04193475 0.04193475 
##     ANAPC4     ANAPC5        AQR     ARGLU1      ASCC3       ATF5 
## 0.04193475 0.04193475 0.04193475 0.04193475 0.04193475 0.04193475 
##       ATG3   ATP6V1G1     ATXN10       BANP      BCAS2      BCCIP 
## 0.04193475 0.04193475 0.04193475 0.04193475 0.04193475 0.04193475 
##    BCL2L14       BCL6 
## 0.04193475 0.04193475

We can use the essential genes from Evers et al. 2016 to estimate proxy measures for false positive rates and false negative rates.

dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores)
## [1] 15975     2
EssentialGenes = read.table(file = "~/sgRNA/sgRNA2Groups/data/Evers2016/EssentialGenes.txt", header = TRUE)
head(EssentialGenes)
##    gene essential
## 1 RPS24         1
## 2 RPL11         1
## 3 PSMA3         1
## 4 RPL34         1
## 5  RPS8         1
## 6 RPL30         1
dim(EssentialGenes)
## [1] 93  2
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores[match(EssentialGenes$gene, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$gene), ]
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential[-which(is.na(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene)), ]

WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential = data.frame(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential, essential = EssentialGenes$essential[which(EssentialGenes$gene %in% WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene)])
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)
## [1] 67  3
library(ggjoy)
## Loading required package: ggplot2
## Loading required package: ggridges
## The ggjoy package has been deprecated. Please switch over to the
## ggridges package, which provides the same functionality. Porting
## guidelines can be found here:
## https://github.com/clauswilke/ggjoy/blob/master/README.md
library(ggplot2)
b = seq(from = 0, to = 1, length = 41)
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$essential == 0)], breaks = b,  col = rgb(1, 0, 0, 0.7), main = "essential vs nonessential genes", xlab = "locfdr", ylim = c(0, 40))
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$essential == 1)], breaks = b,  col = rgb(0, 1, 0, 0.7), add = TRUE)

estimatedFdr = sapply(1:dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)[1], function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr[i])]))

1 - sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$essential[which(estimatedFdr < 0.2)])/sum(estimatedFdr < 0.2)
## [1] 0
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:IRanges':
## 
##     cov, var
## The following objects are masked from 'package:S4Vectors':
## 
##     cov, var
## The following object is masked from 'package:BiocGenerics':
## 
##     var
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc = roc(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc
## 
## Call:
## roc.default(response = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$essential,     predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr)
## 
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr in 21 controls (WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$essential 0) > 46 cases (WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$essential 1).
## Area under the curve: 0.9772
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle[match(EssentialGenes$gene, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$Gene), ]
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential[-which(is.na(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene)), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
## [1] 67  8
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
##        Gene sgRNA condition.beta condition.z condition.p.value
## 10683 RPS24    20       -1.03990    -38.0170        0.0000e+00
## 5703  RPL11    20       -0.61181    -25.2040        5.0078e-05
## 8747  PSMA3    20       -0.62255    -23.5930        3.7559e-05
## 13912 RPL34    20       -0.67810    -27.0830        1.2520e-05
## 11850  RPS8    20       -0.11783     -4.9472        2.8248e-01
## 5111  RPL30    20       -0.59654    -23.4070        6.2598e-05
##       condition.fdr condition.wald.p.value condition.wald.fdr
## 10683    0.00000000             0.0000e+00         0.0000e+00
## 5703     0.00225350            3.6258e-140        2.7321e-138
## 8747     0.00174420            4.5397e-123        3.0600e-121
## 13912    0.00059524            1.5500e-161        1.3989e-159
## 11850    0.83158000             7.5299e-07         9.0785e-06
## 5111     0.00268100            3.6412e-121        2.4236e-119
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential = data.frame(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential, essential = EssentialGenes$essential[which(EssentialGenes$gene %in% WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene)])
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
## [1] 67  9
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
##        Gene sgRNA condition.beta condition.z condition.p.value
## 10683 RPS24    20       -1.03990    -38.0170        0.0000e+00
## 5703  RPL11    20       -0.61181    -25.2040        5.0078e-05
## 8747  PSMA3    20       -0.62255    -23.5930        3.7559e-05
## 13912 RPL34    20       -0.67810    -27.0830        1.2520e-05
## 11850  RPS8    20       -0.11783     -4.9472        2.8248e-01
## 5111  RPL30    20       -0.59654    -23.4070        6.2598e-05
##       condition.fdr condition.wald.p.value condition.wald.fdr essential
## 10683    0.00000000             0.0000e+00         0.0000e+00         1
## 5703     0.00225350            3.6258e-140        2.7321e-138         1
## 8747     0.00174420            4.5397e-123        3.0600e-121         1
## 13912    0.00059524            1.5500e-161        1.3989e-159         1
## 11850    0.83158000             7.5299e-07         9.0785e-06         1
## 5111     0.00268100            3.6412e-121        2.4236e-119         1
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.p.value[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential == 0)], breaks = b,  col = rgb(1, 0, 0, 0.7), main = "essential vs nonessential genes", xlab = "permutation p-value", ylim = c(0, 40))
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.p.value[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential == 1)], breaks = b, col = rgb(0, 1, 0, 0.7), add = TRUE)

#hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.p.value[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential == 0)], breaks = b, col = rgb(1, 0, 0, 0.7), main = "essential vs nonessential genes", xlab = "wald p-value", ylim = c(0, 50))
#hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.p.value[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential == 1)], breaks = b, col = rgb(0, 1, 0, 0.7), add = TRUE)

WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc = roc(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc
## 
## Call:
## roc.default(response = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential,     predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr)
## 
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr in 21 controls (WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential 0) > 46 cases (WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential 1).
## Area under the curve: 0.9586
1 - sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2)])/sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2)
## [1] 0
#WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.wald.roc = roc(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr)
#WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.wald.roc

#1 - sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.2)])/sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.2)

The performance of all is similar. They all pretty much correctly identify the essential genes. To get better estimates we’ll take the list of essential genes directly from Hart et al 2015.

ConstitutiveCoreEssentialGenes = scan("~/sgRNA/sgRNA2Groups/data/Weissman/ConstitutiveCoreEssentialGenes.txt", what = character())
length(ConstitutiveCoreEssentialGenes)
## [1] 217
head(ConstitutiveCoreEssentialGenes)
## [1] "ALYREF"   "AQR"      "ARCN1"    "BRIX1"    "C12orf66" "CCT3"
tail(ConstitutiveCoreEssentialGenes)
## [1] "XAB2"   "XPO1"   "YY1"    "ZBTB48" "ZC3H13" "ZNF207"
NonEssentialGenes = scan("~/sgRNA/sgRNA2Groups/data/Weissman/NonEssentialGenes.txt", what = character())
EssentialGenes = data.frame(genes = factor(c(sapply(ConstitutiveCoreEssentialGenes, toString), sapply(NonEssentialGenes, toString))), essential = c(rep(1, times = length(ConstitutiveCoreEssentialGenes)), rep(0, times = length(NonEssentialGenes))))
dim(EssentialGenes)
## [1] 1144    2
head(EssentialGenes)
##             genes essential
## ALYREF     ALYREF         1
## AQR           AQR         1
## ARCN1       ARCN1         1
## BRIX1       BRIX1         1
## C12orf66 C12orf66         1
## CCT3         CCT3         1
tail(EssentialGenes)
##           genes essential
## ZNF679   ZNF679         0
## ZNF804B ZNF804B         0
## ZNRF4     ZNRF4         0
## ZP2         ZP2         0
## ZP4         ZP4         0
## ZSWIM2   ZSWIM2         0
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix$genes %in% ConstitutiveCoreEssentialGenes)
## [1] 217
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix$genes %in% NonEssentialGenes)
## [1] 460
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores[match(EssentialGenes$genes, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$gene), ]
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential[-which(is.na(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene)), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)
## [1] 677   2
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)
##              gene     locfdr
## ALYREF     ALYREF 0.01962154
## AQR           AQR 0.00000000
## ARCN1       ARCN1 0.00000000
## BRIX1       BRIX1 0.97280497
## C12orf66 C12orf66 0.09600878
## CCT3         CCT3 0.02372761
EssentialGenes = EssentialGenes[which(EssentialGenes$genes %in% WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene), ]
dim(EssentialGenes)
## [1] 677   2
head(EssentialGenes)
##             genes essential
## ALYREF     ALYREF         1
## AQR           AQR         1
## ARCN1       ARCN1         1
## BRIX1       BRIX1         1
## C12orf66 C12orf66         1
## CCT3         CCT3         1
library(pROC)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc
## 
## Call:
## roc.default(response = EssentialGenes$essential, predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr)
## 
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr in 460 controls (EssentialGenes$essential 0) > 217 cases (EssentialGenes$essential 1).
## Area under the curve: 0.9104
estimatedFdr = sapply(1:dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)[1], function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr[i])]))
AllEstimatedFdr = sapply(1:length(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr), function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[i])]))
length(estimatedFdr)
## [1] 677
head(estimatedFdr)
## [1] 0.0005734193 0.0000000000 0.0000000000 0.8374039915 0.0070784015
## [6] 0.0008534623
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)
##              gene     locfdr
## ALYREF     ALYREF 0.01962154
## AQR           AQR 0.00000000
## ARCN1       ARCN1 0.00000000
## BRIX1       BRIX1 0.97280497
## C12orf66 C12orf66 0.09600878
## CCT3         CCT3 0.02372761
1 - sum(EssentialGenes$essential[which(estimatedFdr < 0.1)])/sum(estimatedFdr < 0.1)
## [1] 0.01149425
sum(estimatedFdr < 0.1)
## [1] 174
sum(EssentialGenes$essential[which(estimatedFdr < 0.1)])
## [1] 172
1 - sum(EssentialGenes$essential[which(estimatedFdr < 0.2)])/sum(estimatedFdr < 0.2)
## [1] 0.02762431
sum(estimatedFdr < 0.2)
## [1] 181
sum(EssentialGenes$essential[which(estimatedFdr < 0.2)])
## [1] 176
sum(AllEstimatedFdr < 0.1)
## [1] 1425
sum(AllEstimatedFdr < 0.2)
## [1] 1853
max(estimatedFdr[which(EssentialGenes$essential == 1)])
## [1] 0.8458869
hist(estimatedFdr, breaks = 100, col = "grey")

fdr.curve <- function(thresh, fdrs, baseline){
  w = which(fdrs < thresh)
  if(length(w) > 0){
    return(sum(1 - baseline[w])/length(w))
  }
  else{
    return(NA)
  }
}
s = seq(from = 0, to = 1, length = 1001)

f = sapply(s, function(t) fdr.curve(t, estimatedFdr, EssentialGenes$essential))
plot(c(0, s[!is.na(f)]), c(0, f[!is.na(f)]), type = "l", xlab = "estimated FDR", ylab = "empirical FDR", main = "CRISPhieRmix estimated FDR", xlim = c(0, 1), ylim = c(0, 1), lwd = 2, col = "dodgerblue")
abline(0, 1, lty = 2)

dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle)
## [1] 15975     8
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle)
##     Gene sgRNA condition.beta condition.z condition.p.value condition.fdr
## 1  RNF14    20       0.106220     4.31670         0.0012645      0.033611
## 2 DUOXA1    10       0.019616     0.58528         0.3902300      0.875420
## 3  RNF17    10       0.104460     2.98260         0.0014523      0.038095
## 4  RNF10    20       0.016656     0.68569         0.4333900      0.886710
## 5  RNF11    10       0.016568     0.48065         0.4345700      0.887640
## 6  RNF13    10       0.040640     1.17980         0.1515500      0.691240
##   condition.wald.p.value condition.wald.fdr
## 1             1.5836e-05         0.00017024
## 2             5.5836e-01         0.85116000
## 3             2.8577e-03         0.02196900
## 4             4.9291e-01         0.81930000
## 5             6.3077e-01         0.88152000
## 6             2.3810e-01         0.60645000
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle[match(EssentialGenes$genes, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$Gene), ]
#WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential[-which(is.na(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene)), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
## [1] 677   8
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
##           Gene sgRNA condition.beta condition.z condition.p.value
## 6506    ALYREF    10      -0.272860     -7.9535         0.0236490
## 8102       AQR    20      -1.163200    -38.7860         0.0000000
## 4658     ARCN1    20      -0.889430    -33.6890         0.0000000
## 15106    BRIX1    20      -0.027779     -1.1841         0.9162200
## 5560  C12orf66    10       0.302800      8.7749         0.0000000
## 7228      CCT3    10      -0.385800    -11.2170         0.0037058
##       condition.fdr condition.wald.p.value condition.wald.fdr
## 6506       0.287080             1.8136e-15         3.0788e-14
## 8102       0.000000             0.0000e+00         0.0000e+00
## 4658       0.000000            8.3883e-249        1.5228e-246
## 15106      0.988650             2.3637e-01         6.0484e-01
## 5560       0.000000             1.7102e-18         3.1512e-17
## 7228       0.079892             3.3534e-29         7.5028e-28
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc
## 
## Call:
## roc.default(response = EssentialGenes$essential, predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr)
## 
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr in 460 controls (EssentialGenes$essential 0) > 217 cases (EssentialGenes$essential 1).
## Area under the curve: 0.8652
1 - sum(EssentialGenes$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)])/sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)
## [1] 0.02752294
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)
## [1] 109
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.fdr < 0.1)
## [1] 795
sum(EssentialGenes$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)])
## [1] 106
1 - sum(EssentialGenes$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2)])/sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2)
## [1] 0.04545455
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2)
## [1] 132
sum(EssentialGenes$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2)])
## [1] 126
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.fdr < 0.2)
## [1] 1024
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr, breaks = 100, col = "grey")

f = sapply(s, function(t) fdr.curve(t, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr, EssentialGenes$essential))
plot(c(0, s[!is.na(f)]), c(0, f[!is.na(f)]), type = "l", xlab = "estimated FDR", ylab = "empirical FDR", main = "MAGeCK permutation FDR", xlim = c(0, 1), ylim = c(0, 1), lwd = 2, col = "deeppink")
abline(0, 1, lty = 2)

max(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr[which(EssentialGenes$essential == 1)])
## [1] 0.99795
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.wald.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.wald.roc
## 
## Call:
## roc.default(response = EssentialGenes$essential, predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr)
## 
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr in 460 controls (EssentialGenes$essential 0) > 217 cases (EssentialGenes$essential 1).
## Area under the curve: 0.9434
1 - sum(EssentialGenes$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.1)])/sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.1)
## [1] 0.05154639
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.1)
## [1] 194
1 - sum(EssentialGenes$essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.2)])/sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.2)
## [1] 0.09268293
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.2)
## [1] 205
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr, breaks = 100, col = "grey")

f = sapply(s, function(t) fdr.curve(t, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr, EssentialGenes$essential))
plot(c(0, s[!is.na(f)]), c(0, f[!is.na(f)]), type = "l", xlab = "estimated FDR", ylab = "empirical FDR", main = "MAGeCK Wald FDR", xlim = c(0, 1), ylim = c(0, 1), lwd = 2, col = "darkviolet")
abline(0, 1, lty = 2)

sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Normalmix$genes %in% ConstitutiveCoreEssentialGenes)
## [1] 217
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Normalmix$genes %in% NonEssentialGenes)
## [1] 460
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores[match(EssentialGenes$genes, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$gene), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential)
## [1] 677   2
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential)
##              gene       locfdr
## ALYREF     ALYREF 0.000000e+00
## AQR           AQR 0.000000e+00
## ARCN1       ARCN1 0.000000e+00
## BRIX1       BRIX1 9.704430e-01
## C12orf66 C12orf66 2.220446e-16
## CCT3         CCT3 0.000000e+00
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$locfdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential.roc
## 
## Call:
## roc.default(response = EssentialGenes$essential, predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$locfdr)
## 
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$locfdr in 460 controls (EssentialGenes$essential 0) > 217 cases (EssentialGenes$essential 1).
## Area under the curve: 0.9147
NormalEstimatedFdr = sapply(1:dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential)[1], function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$locfdr[i])]))

NormalAllEstimatedFdr = sapply(1:dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores)[1], function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr[i])]))

sum(NormalAllEstimatedFdr < 0.1)
## [1] 4476
1 - sum(EssentialGenes$essential[which(NormalEstimatedFdr < 0.1)])/sum(NormalEstimatedFdr < 0.1)
## [1] 0.2139918
sum(EssentialGenes$essential[which(NormalEstimatedFdr < 0.1)])
## [1] 191
sum(1 - EssentialGenes$essential[which(NormalEstimatedFdr < 0.1)])
## [1] 52
sum(NormalEstimatedFdr < 0.1)
## [1] 243
sum(NormalAllEstimatedFdr < 0.2)
## [1] 5258
1 - sum(EssentialGenes$essential[which(NormalEstimatedFdr < 0.2)])/sum(NormalEstimatedFdr < 0.2)
## [1] 0.2788104
sum(EssentialGenes$essential[which(NormalEstimatedFdr < 0.2)])
## [1] 194
sum(1 - EssentialGenes$essential[which(NormalEstimatedFdr < 0.2)])
## [1] 75
sum(NormalEstimatedFdr < 0.2)
## [1] 269
hist(estimatedFdr, breaks = 100, col = "grey")

f = sapply(s, function(t) fdr.curve(t, NormalEstimatedFdr, EssentialGenes$essential))
plot(c(0, s[!is.na(f)]), c(0, f[!is.na(f)]), type = "l", xlab = "estimated FDR", ylab = "empirical FDR", main = "Normal hierarchical mixture estimated FDR", xlim = c(0, 1), ylim = c(0, 1), lwd = 2, col = "darkblue")
abline(0, 1, lty = 2)

dim(EssentialGenes)
## [1] 677   2
sum(EssentialGenes$essential)
## [1] 217
MannWhitneyFdrs.essential = MannWhitneyFdrs[which(names(MannWhitneyFdrs) %in% EssentialGenes$gene)]
hist(MannWhitneyFdrs.essential, breaks = seq(from = 0, to = 1, length = 101), col = "grey")

sum(MannWhitneyFdrs < 0.1)
## [1] 1383
sum(MannWhitneyFdrs.essential < 0.1)
## [1] 148
sum(MannWhitneyFdrs.essential < 0.1 & EssentialGenes$essential == 1)
## [1] 52
sum(MannWhitneyFdrs < 0.2)
## [1] 1894
sum(MannWhitneyFdrs.essential < 0.2)
## [1] 166
sum(MannWhitneyFdrs.essential < 0.2 & EssentialGenes$essential == 1)
## [1] 58
MannWhitneyFdrs.essential.roc = roc(EssentialGenes$essential, MannWhitneyFdrs.essential)
MannWhitneyFdrs.essential.roc
## 
## Call:
## roc.default(response = EssentialGenes$essential, predictor = MannWhitneyFdrs.essential)
## 
## Data: MannWhitneyFdrs.essential in 460 controls (EssentialGenes$essential 0) < 217 cases (EssentialGenes$essential 1).
## Area under the curve: 0.5136
length(intersect(names(MannWhitneyFdrs.essential)[which(MannWhitneyFdrs.essential < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)]))
## [1] 0
top3MannWhitneyFdrs.essential = top3MannWhitneyFdrs[which(names(top3MannWhitneyFdrs) %in% EssentialGenes$gene)]

hist(top3MannWhitneyFdrs.essential, breaks = seq(from = 0, to = 1, length = 101), col = "grey")

sum(top3MannWhitneyFdrs < 0.1)
## [1] 3550
sum(top3MannWhitneyFdrs.essential < 0.1)
## [1] 227
sum(top3MannWhitneyFdrs.essential < 0.1 & EssentialGenes$essential == 1)
## [1] 78
sum(top3MannWhitneyFdrs < 0.2)
## [1] 5342
sum(top3MannWhitneyFdrs.essential < 0.2)
## [1] 291
sum(top3MannWhitneyFdrs.essential < 0.2 & EssentialGenes$essential == 1)
## [1] 95
top3MannWhitneyFdrs.essential.roc = roc(EssentialGenes$essential, top3MannWhitneyFdrs.essential)
top3MannWhitneyFdrs.essential.roc
## 
## Call:
## roc.default(response = EssentialGenes$essential, predictor = top3MannWhitneyFdrs.essential)
## 
## Data: top3MannWhitneyFdrs.essential in 460 controls (EssentialGenes$essential 0) > 217 cases (EssentialGenes$essential 1).
## Area under the curve: 0.5176
length(intersect(names(top3MannWhitneyFdrs.essential)[which(top3MannWhitneyFdrs.essential < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)]))
## [1] 0
length(intersect(names(MannWhitneyFdrs.essential)[which(MannWhitneyFdrs.essential < 0.1)], intersect(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)])))
## [1] 0
cols = RColorBrewer::brewer.pal(8, "Set1")
plot(top3MannWhitneyFdrs.essential.roc, col = cols[1], lwd = 2, lty = 7, xlim = c(0, 1), ylim = c(0, 1), main = "ROC curves")
lines(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc, col = cols[2], lwd = 2, lty = 4)
lines(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential.roc, col = cols[3], lwd = 2, lty = 5)
lines(MannWhitneyFdrs.essential.roc, col = cols[4], lwd = 2, lty = 6)
lines(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc, col = cols[5], lwd = 2, lty = 2)
legend("bottomleft", legend = c(paste0("Mann-Whitney of top3 guides (AUC = ", round(top3MannWhitneyFdrs.essential.roc$auc, digits = 2), ")"), paste0("Mann-Whitney (AUC = ", round(MannWhitneyFdrs.essential.roc$auc, digits = 2), ")"), paste0("MAGeCK MLE (AUC = ", round(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc$auc, digits = 2), ")"),  paste0("CRISPhieRmix (AUC = ", round(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc$auc, digits = 2), ")"), paste0("normal hier mix (AUC = ", round(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential.roc$auc, digits = 2), ")")), lty = c(7, 6, 2, 4, 5), lwd = 2, col = c("darkgreen", "chartreuse", "deeppink", "dodgerblue", "darkblue"))

pdf('~/sgRNA/sgRNA2Groups/tex_writing/figures/Gilbert2014/CRISPRiUntreated2day0RocCurves.pdf')
plot(top3MannWhitneyFdrs.essential.roc, col = "darkgreen", lwd = 2, lty = 7, xlim = c(0, 1), ylim = c(0, 1), main = "ROC curves")
lines(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc, col = "dodgerblue", lwd = 2, lty = 4)
lines(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential.roc, col = "darkblue", lwd = 2, lty = 5)
lines(MannWhitneyFdrs.essential.roc, col = "chartreuse", lwd = 2, lty = 6)
lines(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc, col = "deeppink", lwd = 2, lty = 2)
legend("bottomleft", legend = c(paste0("Mann-Whitney of top3 guides (AUC = ", round(top3MannWhitneyFdrs.essential.roc$auc, digits = 2), ")"), paste0("Mann-Whitney (AUC = ", round(MannWhitneyFdrs.essential.roc$auc, digits = 2), ")"), paste0("MAGeCK MLE (AUC = ", round(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc$auc, digits = 2), ")"),  paste0("CRISPhieRmix (AUC = ", round(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc$auc, digits = 2), ")"), paste0("normal hier mix (AUC = ", round(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential.roc$auc, digits = 2), ")")), lty = c(7, 6, 2, 4, 5), lwd = 2, col = c("darkgreen", "chartreuse", "deeppink",  "dodgerblue", "darkblue"))
dev.off()
## quartz_off_screen 
##                 2
b = seq(from = min(log2fc) - 0.1, to = max(log2fc) + 0.1, length = 201)
hist(log2fc, breaks = 80, probability = TRUE, main = "mixture fit to observations")
mixFit = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix$mixFit
lines(b, mixFit$pq*dnorm(b, mixFit$mu, mixFit$sigma), lwd = 2, col = "darkgreen")
lines(b, (1 - mixFit$pq)*sn::dst(b, dp = mixFit$skewtFit$dp), col = "red", lwd  = 2)
lines(b, mixFit$pq*dnorm(b, mixFit$mu, mixFit$sigma) + (1 - mixFit$pq)*sn::dst(b, dp = mixFit$skewtFit$dp), col = "darkviolet", lty = 2, lwd = 2)

pdf('~/sgRNA/sgRNA2Groups/tex_writing/figures/Gilbert2014/CRISPRiUntreated2day0CRISPhieRmixFit.pdf')
b = seq(from = min(log2fc) - 0.1, to = max(log2fc) + 0.1, length = 201)
hist(log2fc, breaks = 80, probability = TRUE, main = "mixture fit to observations")
mixFit = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix$mixFit
lines(b, mixFit$pq*dnorm(b, mixFit$mu, mixFit$sigma), lwd = 2, col = "darkgreen")
lines(b, (1 - mixFit$pq)*sn::dst(b, dp = mixFit$skewtFit$dp), col = "red", lwd  = 2)
lines(b, mixFit$pq*dnorm(b, mixFit$mu, mixFit$sigma) + (1 - mixFit$pq)*sn::dst(b, dp = mixFit$skewtFit$dp), col = "darkviolet", lty = 2, lwd = 2)
legend("topleft", legend = c("null fit", "dropout fit", "combined fit"), lty = c(1, 1, 2), col = c("darkgreen", "red", "darkviolet"))
dev.off()
## quartz_off_screen 
##                 2
mageckGenes = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1 & EssentialGenes$essential == 1)]
length(mageckGenes)
## [1] 106
CRISPhieRmixGenes = EssentialGenes$gene[which(estimatedFdr < 0.1 & EssentialGenes$essential == 1)]
length(CRISPhieRmixGenes)
## [1] 172
length(intersect(mageckGenes, CRISPhieRmixGenes))
## [1] 105
GenesInCommon = intersect(mageckGenes, CRISPhieRmixGenes)
b = seq(from = min(log2fc) - 0.1, to = max(log2fc) + 0.1, length = 81)
hist(log2fc[which(geneIds %in% GenesInCommon)], breaks = b, col = "grey", probability = TRUE)

pdf('~/sgRNA/sgRNA2Groups/tex_writing/figures/Gilbert2014/GenesInCommonHist.pdf')
hist(log2fc[which(geneIds %in% GenesInCommon)], breaks = b, col = "grey", probability = TRUE)
dev.off()
## quartz_off_screen 
##                 2
GenesCRISPhieRspecific = setdiff(CRISPhieRmixGenes, GenesInCommon)
length(GenesCRISPhieRspecific)
## [1] 67
hist(log2fc[which(geneIds %in% GenesCRISPhieRspecific)], breaks = b, col = "grey", probability = TRUE)

pdf('~/sgRNA/sgRNA2Groups/tex_writing/figures/Gilbert2014/GenesCRISPhieRspecificHist.pdf')
hist(log2fc[which(geneIds %in% GenesCRISPhieRspecific)], breaks = b, col = "grey", probability = TRUE)
dev.off()
## quartz_off_screen 
##                 2
hist(negCtrl, breaks = b, probability = TRUE, col = rgb(1, 0, 0, 0.6), xlab = "log2fc")
hist(log2fc[which(geneIds == GenesInCommon[1])], breaks = b, probability = TRUE, col = rgb(0, 1, 0, 0.8), add = TRUE)
hist(log2fc[which(geneIds == GenesCRISPhieRspecific[1])], breaks = b, probability = TRUE, col = rgb(0, 0, 1, 0.8), add = TRUE)
legend("topleft", legend = c("negative control", GenesInCommon[1], GenesCRISPhieRspecific[1]), pch = 15, col = c(rgb(1, 0, 0, 0.6), rgb(0, 1, 0, 0.8), rgb(0, 0, 1, 0.8)))

y = data.frame(gene = c(rep("negCtrl", times = length(negCtrl)), rep(toString(GenesInCommon[1]), times = length(which(geneIds == GenesInCommon[1]))), rep(toString(GenesCRISPhieRspecific[1]), times = length(which(geneIds == GenesCRISPhieRspecific[1])))), log2fc = c(negCtrl, log2fc[which(geneIds == GenesInCommon[1])], log2fc[which(geneIds == GenesCRISPhieRspecific[1])]))
library(ggplot2)
library(ggjoy)
ggplot(y, aes(x = log2fc, y = gene)) + geom_joy(scale = 0.8) + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 
## Picking joint bandwidth of 0.309

pdf('~/sgRNA/sgRNA2Groups/tex_writing/figures/Gilbert2014/GenesCRISPhieRspecificHistExampleGenes.pdf')
ggplot(y, aes(x = log2fc, y = gene)) + geom_joy(scale = 0.8) + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 
## Picking joint bandwidth of 0.309
dev.off()
## quartz_off_screen 
##                 2
y = data.frame(gene = c(rep("negCtrl", times = length(negCtrl)), rep("Genes In Common", times = length(which(geneIds %in% GenesInCommon))), rep("CRISPhieRspecific", times = length(which(geneIds %in% GenesCRISPhieRspecific)))), log2fc = c(negCtrl, log2fc[which(geneIds %in% GenesInCommon)], log2fc[which(geneIds %in% GenesCRISPhieRspecific)]))
ggplot(y, aes(x = log2fc, y = gene)) + geom_joy(scale = 0.9) + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 
## Picking joint bandwidth of 0.144

pdf('~/sgRNA/sgRNA2Groups/tex_writing/figures/Gilbert2014/GenesInCommonVsCRISPhieRspecificJoyplot.pdf')
ggplot(y, aes(x = log2fc, y = gene)) + geom_joy(scale = 0.9) + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black")) 
## Picking joint bandwidth of 0.144
dev.off()
## quartz_off_screen 
##                 2
# 2017 essential genes
EssentialGenes = read.table("~/Downloads/TableS2.txt")
EssentialGenes = EssentialGenes[,1]
length(EssentialGenes)
## [1] 684
NonEssentialGenes = scan("~/sgRNA/sgRNA2Groups/data/Weissman/NonEssentialGenes.txt", what = character())
length(NonEssentialGenes)
## [1] 927
EssentialGenes = data.frame(gene = c(sapply(EssentialGenes, toString), sapply(NonEssentialGenes, toString)), essential = c(rep(1, times = length(EssentialGenes)), rep(0, times = length(NonEssentialGenes))))
dim(EssentialGenes)
## [1] 1611    2
head(EssentialGenes)
##     gene essential
## 1   AARS         1
## 2  ABCE1         1
## 3  ABCF1         1
## 4   ACTB         1
## 5 ACTL6A         1
## 6 ACTR10         1
tail(EssentialGenes)
##         gene essential
## 1606  ZNF679         0
## 1607 ZNF804B         0
## 1608   ZNRF4         0
## 1609     ZP2         0
## 1610     ZP4         0
## 1611  ZSWIM2         0
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix.estimatedFdr = sapply(1:length(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr), function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[i])]) )
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores = data.frame(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores, estimatedFdr = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix.estimatedFdr)

sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmix$genes %in% EssentialGenes$gene)
## [1] 1139
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores[match(EssentialGenes$gene, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$gene), ]
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential[-which(is.na(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene)), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)
## [1] 1139    3
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential)
##          gene       locfdr estimatedFdr
## AARS     AARS 0.000000e+00 0.000000e+00
## ABCE1   ABCE1 5.223723e-01 1.450369e-01
## ABCF1   ABCF1 2.146361e-01 2.867480e-02
## ACTB     ACTB 9.398072e-01 5.846380e-01
## ACTL6A ACTL6A 0.000000e+00 0.000000e+00
## ACTR10 ACTR10 6.182854e-11 2.660263e-12
EssentialGenes = EssentialGenes[which(EssentialGenes$gene %in% WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene), ]
dim(EssentialGenes)
## [1] 1139    2
head(EssentialGenes)
##     gene essential
## 1   AARS         1
## 2  ABCE1         1
## 3  ABCF1         1
## 4   ACTB         1
## 5 ACTL6A         1
## 6 ACTR10         1
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)
## [1] 436
length(which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1 & EssentialGenes$essential == 1))
## [1] 434
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$estimatedFdr < 0.1)
## [1] 1425
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.2)
## [1] 482
length(which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.2 & EssentialGenes$essential == 1))
## [1] 477
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr, breaks = seq(from = 0, to = 1, length = 101), col = "grey")

WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential.roc
## 
## Call:
## roc.default(response = EssentialGenes$essential, predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr)
## 
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$locfdr in 460 controls (EssentialGenes$essential 0) > 679 cases (EssentialGenes$essential 1).
## Area under the curve: 0.8821
fdr.curve <- function(thresh, fdrs, baseline){
  w = which(fdrs < thresh)
  if(length(w) > 0){
    return(sum(1 - baseline[w])/length(w))
  }
  else{
    return(NA)
  }
}
s = seq(from = 0, to = 1, length = 1001)

dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle)
## [1] 15975     8
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle)
##     Gene sgRNA condition.beta condition.z condition.p.value condition.fdr
## 1  RNF14    20       0.106220     4.31670         0.0012645      0.033611
## 2 DUOXA1    10       0.019616     0.58528         0.3902300      0.875420
## 3  RNF17    10       0.104460     2.98260         0.0014523      0.038095
## 4  RNF10    20       0.016656     0.68569         0.4333900      0.886710
## 5  RNF11    10       0.016568     0.48065         0.4345700      0.887640
## 6  RNF13    10       0.040640     1.17980         0.1515500      0.691240
##   condition.wald.p.value condition.wald.fdr
## 1             1.5836e-05         0.00017024
## 2             5.5836e-01         0.85116000
## 3             2.8577e-03         0.02196900
## 4             4.9291e-01         0.81930000
## 5             6.3077e-01         0.88152000
## 6             2.3810e-01         0.60645000
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle[match(EssentialGenes$gene, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$Gene), ]
#WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential[-which(is.na(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene)), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
## [1] 1139    8
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential)
##         Gene sgRNA condition.beta condition.z condition.p.value
## 8083    AARS    20      -1.105700   -43.18500        0.00000000
## 292    ABCE1    10      -0.199890    -6.03720        0.07482900
## 3584   ABCF1     9      -0.119750    -3.10410        0.27434000
## 6474    ACTB    10      -0.023119    -0.67452        0.97137000
## 15690 ACTL6A    20      -0.538490   -21.99600        0.00021283
## 4278  ACTR10     9      -0.800890   -19.66300        0.00000000
##       condition.fdr condition.wald.p.value condition.wald.fdr
## 8083      0.0000000             0.0000e+00         0.0000e+00
## 292       0.5335100             1.5677e-09         2.1929e-08
## 3584      0.8248800             1.9086e-03         1.5192e-02
## 6474      0.9955800             4.9998e-01         8.2292e-01
## 15690     0.0076923            3.1238e-107        1.8146e-105
## 4278      0.0000000             4.4543e-86         2.0273e-84
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)
## [1] 264
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.fdr < 0.1)
## [1] 795
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1 & EssentialGenes$essential == 1)
## [1] 261
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2)
## [1] 300
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.fdr < 0.2)
## [1] 1024
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.2 & EssentialGenes$essential == 1)
## [1] 294
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr, breaks = seq(from = 0, to = 1, length = 101), col = "grey")

WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr)
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.perm.roc
## 
## Call:
## roc.default(response = EssentialGenes$essential, predictor = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr)
## 
## Data: WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr in 460 controls (EssentialGenes$essential 0) > 679 cases (EssentialGenes$essential 1).
## Area under the curve: 0.7933
#sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.1)
#sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.wald.fdr < 0.1)
#sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.1 & EssentialGenes$essential == 1)
#sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.2)
#sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.wald.fdr < 0.2)
#sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr < 0.2 & EssentialGenes$essential == 1)
#hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr, breaks = seq(from = 0, to = 1, length = 101), col = "grey")
#WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.wald.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.wald.fdr)
#WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential.wald.roc

length(intersect(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)]))
## [1] 261
MannWhitneyFdrs.essential = MannWhitneyFdrs[which(names(MannWhitneyFdrs) %in% EssentialGenes$gene)]
hist(MannWhitneyFdrs.essential, breaks = seq(from = 0, to = 1, length = 101), col = "grey")

sum(MannWhitneyFdrs < 0.1)
## [1] 1383
sum(MannWhitneyFdrs.essential < 0.1)
## [1] 355
sum(MannWhitneyFdrs.essential < 0.1 & EssentialGenes$essential == 1)
## [1] 210
sum(MannWhitneyFdrs < 0.2)
## [1] 1894
sum(MannWhitneyFdrs.essential < 0.2)
## [1] 397
sum(MannWhitneyFdrs.essential < 0.2 & EssentialGenes$essential == 1)
## [1] 229
MannWhitneyFdrs.essential.roc = roc(EssentialGenes$essential, MannWhitneyFdrs.essential)
MannWhitneyFdrs.essential.roc
## 
## Call:
## roc.default(response = EssentialGenes$essential, predictor = MannWhitneyFdrs.essential)
## 
## Data: MannWhitneyFdrs.essential in 460 controls (EssentialGenes$essential 0) < 679 cases (EssentialGenes$essential 1).
## Area under the curve: 0.5185
length(intersect(names(MannWhitneyFdrs.essential)[which(MannWhitneyFdrs.essential < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)]))
## [1] 325
top3MannWhitneyFdrs.essential = top3MannWhitneyFdrs[which(names(top3MannWhitneyFdrs) %in% EssentialGenes$gene)]

hist(top3MannWhitneyFdrs.essential, breaks = seq(from = 0, to = 1, length = 101), col = "grey")

sum(top3MannWhitneyFdrs < 0.1)
## [1] 3550
sum(top3MannWhitneyFdrs.essential < 0.1)
## [1] 555
sum(top3MannWhitneyFdrs.essential < 0.1 & EssentialGenes$essential == 1)
## [1] 333
sum(top3MannWhitneyFdrs < 0.2)
## [1] 5342
sum(top3MannWhitneyFdrs.essential < 0.2)
## [1] 644
sum(top3MannWhitneyFdrs.essential < 0.2 & EssentialGenes$essential == 1)
## [1] 392
top3MannWhitneyFdrs.essential.roc = roc(EssentialGenes$essential, top3MannWhitneyFdrs.essential)
top3MannWhitneyFdrs.essential.roc
## 
## Call:
## roc.default(response = EssentialGenes$essential, predictor = top3MannWhitneyFdrs.essential)
## 
## Data: top3MannWhitneyFdrs.essential in 460 controls (EssentialGenes$essential 0) > 679 cases (EssentialGenes$essential 1).
## Area under the curve: 0.5113
length(intersect(names(top3MannWhitneyFdrs.essential)[which(top3MannWhitneyFdrs.essential < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)]))
## [1] 426
length(intersect(names(MannWhitneyFdrs.essential)[which(MannWhitneyFdrs.essential < 0.1)], intersect(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)])))
## [1] 242
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores = data.frame(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores, estimatedFdr = sapply(1:dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores)[1], function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$locfdr[i])])) )
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0Normalmix$genes %in% EssentialGenes$gene)
## [1] 1139
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores[match(EssentialGenes$gene, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores$gene), ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential)
## [1] 1139    3
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential)
##          gene    locfdr estimatedFdr
## AARS     AARS 0.0000000    0.0000000
## ABCE1   ABCE1 0.0000000    0.0000000
## ABCF1   ABCF1 0.0000000    0.0000000
## ACTB     ACTB 0.7531346    0.1375928
## ACTL6A ACTL6A 0.0000000    0.0000000
## ACTR10 ACTR10 0.0000000    0.0000000
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$estimatedFdr < 0.1)
## [1] 608
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$estimatedFdr < 0.1)
## [1] 608
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$estimatedFdr < 0.2)
## [1] 645
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$estimatedFdr < 0.2)
## [1] 645
hist(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$estimatedFdr, breaks = seq(from = 0, to = 1, length = 101), col = "grey")

WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential.roc = roc(EssentialGenes$essential, WeissmanCRISPRiRicinTreatmentUntreatedVsDay0NormalmixScores.essential$locfdr)
mageckGenes = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$condition.fdr < 0.1)]
length(mageckGenes)
# CRISPhieRmixGenes = EssentialGenes$gene[which(estimatedFdr < 0.1 & EssentialGenes$essential == 1)]
CRISPhieRmixGenes = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$estimatedFdr < 0.1)]
length(CRISPhieRmixGenes)
length(intersect(mageckGenes, CRISPhieRmixGenes))
GenesInCommon = intersect(mageckGenes, CRISPhieRmixGenes)
b = seq(from = min(log2fc) - 0.1, to = max(log2fc) + 0.1, length = 81)
hist(log2fc[which(geneIds %in% GenesInCommon)], breaks = b, col = "grey", probability = TRUE)
GenesCRISPhieRspecific = setdiff(CRISPhieRmixGenes, GenesInCommon)
length(GenesCRISPhieRspecific)
hist(log2fc[which(geneIds %in% GenesCRISPhieRspecific)], breaks = b, col = "grey", probability = TRUE)

hist(negCtrl, breaks = b, probability = TRUE, col = rgb(1, 0, 0, 0.6))
hist(log2fc[which(geneIds == GenesInCommon[1])], breaks = b, probability = TRUE, add = TRUE, col = rgb(0, 1, 0, 0.8))
legend("topright", legend = c("negCtrl", GenesInCommon[1]), col = c(rgb(1, 0, 0, 0.6), rgb(0, 0, 1, 0.8)), pch = 15, lty = 0, lwd = 0)
hist(log2fc[which(geneIds == GenesCRISPhieRspecific[1])], breaks = b, probability = TRUE, add = TRUE, col = rgb(0, 0, 1, 0.8))
legend("topright", legend = c("negCtrl", GenesInCommon[1], GenesCRISPhieRspecific[1]), col = c(rgb(1, 0, 0, 0.6), rgb(0, 1, 1, 0.8), rgb(0, 0, 1, 0.8)), pch = 15, lty = 0, lwd = 0)

hist(negCtrl, breaks = b, probability = TRUE, col = rgb(1, 0, 0, 0.6))
hist(log2fc[which(geneIds == GenesInCommon[1])], breaks = b, probability = TRUE, add = TRUE, col = rgb(0, 1, 0, 0.8))
legend("topright", legend = c("negCtrl", GenesInCommon[2]), col = c(rgb(1, 0, 0, 0.6), rgb(0, 0, 1, 0.8)), pch = 15, lty = 0, lwd = 0)
hist(log2fc[which(geneIds == GenesCRISPhieRspecific[2])], breaks = b, probability = TRUE, add = TRUE, col = rgb(0, 0, 1, 0.8))
legend("topright", legend = c("negCtrl", GenesInCommon[2], GenesCRISPhieRspecific[2]), col = c(rgb(1, 0, 0, 0.6), rgb(0, 1, 1, 0.8), rgb(0, 0, 1, 0.8)), pch = 15, lty = 0, lwd = 0)

WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle.essential$Gene == GenesCRISPhieRspecific[2]), ]

WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores.essential$gene == GenesCRISPhieRspecific[2]), ]

Intersection of all genes

estimatedFdrAll = sapply(1:length(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr), function(i) mean(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr <= WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$locfdr[i])]))
names(estimatedFdrAll) = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0CRISPhieRmixScores$gene
length(estimatedFdrAll)
## [1] 15975
head(estimatedFdrAll)
##          A2M       A4GALT         AACS         AATF       ABCA13 
## 7.993683e-01 8.390956e-01 6.082177e-01 4.129825e-06 8.187087e-01 
##        ABCA1 
## 5.824710e-01
WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle = WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle[match(names(estimatedFdrAll), WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$Gene),  ]
dim(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle)
## [1] 15975     8
head(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle)
##         Gene sgRNA condition.beta condition.z condition.p.value
## 6801     A2M    10      0.0209040     0.61278          0.372310
## 13709 A4GALT    20      0.0239440     1.00660          0.330370
## 14185   AACS    10     -0.0459290    -1.32040          0.728300
## 11453   AATF    10     -0.2994100    -8.68820          0.015574
## 5766  ABCA13    10      0.0565180     1.71710          0.057552
## 15152  ABCA1    10      0.0082852     0.24552          0.563520
##       condition.fdr condition.wald.p.value condition.wald.fdr
## 6801        0.86992             5.4002e-01         8.4492e-01
## 13709       0.84835             3.1414e-01         6.8688e-01
## 14185       0.96133             1.8671e-01         5.4222e-01
## 11453       0.22455             3.6836e-18         6.7328e-17
## 5766        0.46952             8.5960e-02         3.4599e-01
## 15152       0.92511             8.0605e-01         9.4383e-01
sum(estimatedFdrAll < 0.1)
## [1] 1425
sum(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.fdr < 0.1)
## [1] 795
length(intersect(names(estimatedFdrAll)[which(estimatedFdrAll < 0.1)], WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$Gene[which(WeissmanCRISPRiRicinTreatmentUntreatedVsDay0MageckMle$condition.fdr < 0.1)]))
## [1] 669